This dataset [1] is downloaded from https://zenodo.org/record/5512749#.Yd3H0dHMKUl. The Baysor Cell segmentation and the author generated cell type assignment are used in the analysis.
Spacial.information=read.csv('segmentation.csv.gz',header = T)
Spacial.information=Spacial.information[Spacial.information$is_noise!='true',]
Spacial.information$cell=as.character(Spacial.information$cell)
Cell_spacial_information=read.csv('segmentation_cell_stats.csv.gz',header = T,row.names = 1)
Celltype=read.csv('cell_assignment.csv.gz',header = T,row.names = 1)
colnames(Celltype)=c('Celltype')
Cell_spacial_information=merge(Cell_spacial_information,Celltype,by = 0)
colnames(Cell_spacial_information)=paste0('cell_',colnames(Cell_spacial_information))
colnames(Cell_spacial_information)[1]='cell'
Cell_spacial_information=Cell_spacial_information[Cell_spacial_information$cell_Celltype!='Removed',]
getPalette = colorRampPalette(brewer.pal(9, "Dark2"))
## Warning in brewer.pal(9, "Dark2"): n too large, allowed maximum for palette Dark2 is 8
## Returning the palette you asked for with that many colors
color=getPalette(length(unique(Cell_spacial_information$cell_Celltype)))
ggplot(Cell_spacial_information,aes(x=cell_x,y=cell_y,colour=cell_Celltype))+geom_point()+
xlim(min(Cell_spacial_information$cell_x),max(Cell_spacial_information$cell_x))+
ylim(min(Cell_spacial_information$cell_y),max(Cell_spacial_information$cell_y)) + scale_color_manual(values = color)+ theme_bw() +
theme(
# get rid of panel grids
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# Change plot and panel background
plot.background=element_rect(fill = "black"),
panel.background = element_rect(fill = 'black'),
# Change legend
#legend.position = c(0.6, 0.07),
#legend.direction = "horizontal",
legend.background = element_rect(fill = "black", color = NA),
legend.key = element_rect(color = "gray", fill = "black"),
legend.title = element_text(color = "white"),
legend.text = element_text(color = "white")
)
Spacial.information=Spacial.information[Spacial.information$cell%in%rownames(Cell_spacial_information),]
Spacial.information=merge(Spacial.information,Cell_spacial_information,by = 'cell')
colnames(Spacial.information)[grepl('Celltype',colnames(Spacial.information))]='Celltype'
ggplot(Spacial.information,aes(x=x,y=y,colour=Celltype))+geom_point(shape='.')+
xlim(min(Spacial.information$x),max(Spacial.information$x))+
ylim(min(Spacial.information$y),max(Spacial.information$y)) + scale_color_manual(values = color)+ theme_bw() +
theme(
# get rid of panel grids
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# Change plot and panel background
plot.background=element_rect(fill = "black"),
panel.background = element_rect(fill = 'black'),
# Change legend
#legend.position = c(0.6, 0.07),
#legend.direction = "horizontal",
legend.background = element_rect(fill = "black", color = NA),
legend.key = element_rect(color = "gray", fill = "black"),
legend.title = element_text(color = "white"),
legend.text = element_text(color = "white")
)
By looking at the visualization results from Part 1, we can see that the cellular composition of different parts of the tissue are different.
Can we use the unsupervised clustering algorithm to find different parts of the tissue?
Time to find out.
This function will break the Image into multiple small fragments. and calculate the cell composition in each small fragments.
Logic:
The whole image is broken into small fragments.
for each small fragment:
for each cell has overlap with the small fragment:
if the cell has more than x% (x determined by the user) of cell's probes'areas in the small fragment, the cell is determined to be in the small fragment.
Cell composition of the small fragment is determined by the cells assigned to it.
The percentage threshold and the size of small fragment can be determined by the user.
One tip: if the number of fragments is increased, please decrease the proportion_threshold. Otherwise many area will be deemed as no cells.
source('functions.R')
Area_Calculated_from_Probe=Area_Calculator(min(Spacial.information$x),
max(Spacial.information$x),
min(Spacial.information$y),
max(Spacial.information$y),
50,Spacial.information,proportion_threshold = 0.5)
cell_coodinates=data.frame(x=Cell_spacial_information$cell_x,y=Cell_spacial_information$cell_y,cell=Cell_spacial_information$cell,
Celltype=Cell_spacial_information$cell_Celltype)
Area_Calculated_from_Coordinates=Area_Counter(min(cell_coodinates$x),
max(cell_coodinates$x),
min(cell_coodinates$y),
max(cell_coodinates$y),
50,cell_coodinates)
In this case I used the Leiden Community detection Algorithm from igraph package to cluster the area.
ClusterfromProbe_results <- Area_Cluster(Area_Calculated_from_Probe
,ScaleFactor = 1e6,resolution_parameter = 0.5)
## Building SNN based on a provided distance matrix
## Computing SNN
ClusterfromCoordinates_results <- Area_Cluster(Area_Calculated_from_Coordinates,ScaleFactor = 1e6,resolution_parameter = 0.5)
## Building SNN based on a provided distance matrix
## Computing SNN
Area.Cluster.assignment=ClusterfromProbe_results$Area.Cluster.assignment
Clustering.Distribution=ClusterfromProbe_results$Clustering.Distribution
melt=Area.Cluster.assignment
melt$cluster=as.factor(melt$cluster)
melt=melt(melt,id.vars='cluster')
colnames(melt)=c('Cluster','CellType','Cells')
Cluster_cell_composition=ggplot(melt,aes(x=Cells,y=Cluster,fill=CellType))+geom_bar(stat = 'identity',position = 'fill')+ scale_fill_manual(values = color)+ theme_bw()
centroid.graph=ggplot(Cell_spacial_information,aes(x=cell_x,y=cell_y,colour=cell_Celltype))+geom_point()+
xlim(min(Cell_spacial_information$cell_x),max(Cell_spacial_information$cell_x))+
ylim(min(Cell_spacial_information$cell_y),max(Cell_spacial_information$cell_y)) + scale_color_manual(values = color)+ theme_bw() +xlab('x') +ylab('y') +
theme(legend.position = "none",
# get rid of panel grids
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# Change plot and panel background
# Change legend
#legend.position = c(0.6, 0.07),
#legend.direction = "horizontal",
)
Clustering.Distribution.plot=ClusterfromProbe_results$Clustering.Distribution.plot
grid.arrange(Clustering.Distribution.plot, centroid.graph,Cluster_cell_composition, ncol=2)
Clustering.Distribution.plot
centroid.graph
Cluster_cell_composition
Area.Cluster.assignment=ClusterfromCoordinates_results$Area.Cluster.assignment
Clustering.Distribution=ClusterfromCoordinates_results$Clustering.Distribution
melt=Area.Cluster.assignment
melt$cluster=as.factor(melt$cluster)
melt=melt(melt,id.vars='cluster')
colnames(melt)=c('Cluster','CellType','Cells')
Cluster_cell_composition=ggplot(melt,aes(x=Cells,y=Cluster,fill=CellType))+geom_bar(stat = 'identity',position = 'fill')+ scale_fill_manual(values = color)+ theme_bw()
centroid.graph=ggplot(Cell_spacial_information,aes(x=cell_x,y=cell_y,colour=cell_Celltype))+geom_point()+
xlim(min(Cell_spacial_information$cell_x),max(Cell_spacial_information$cell_x))+
ylim(min(Cell_spacial_information$cell_y),max(Cell_spacial_information$cell_y)) + scale_color_manual(values = color)+ theme_bw() +xlab('x') +ylab('y') +
theme(legend.position = "none",
# get rid of panel grids
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# Change plot and panel background
# Change legend
#legend.position = c(0.6, 0.07),
#legend.direction = "horizontal",
)
Clustering.Distribution.plot=ClusterfromCoordinates_results$Clustering.Distribution.plot
grid.arrange(Clustering.Distribution.plot, centroid.graph,Cluster_cell_composition, ncol=2)
Clustering.Distribution.plot
centroid.graph
Cluster_cell_composition
Coming soon.
[1] Moffitt, Jeffrey, Xu, Rosalind, Kharchenko, Peter, Petukhov, Viktor, Cadinu, Paolo, Soldatov, Ruslan, & Khodosevich, Konstantin. (2021). MERFISH measurements in the mouse ileum [Data set]. https://doi.org/10.5061/dryad.jm63xsjb2